***********************************************************************************
*This file cleans Census employment by CZ-industry according to IFR classification*
***********************************************************************************

foreach year in 1970 1990 {

use if year==`year' using "$raw_data_lmarket/ipums_census.dta", clear

*Drop institutional group quarters* 
drop if gqtyped>=100 & gqtyped<=499
*Drop alaska and hawai*
drop if statefip==2 | statefip==15
*Restrict to working-age population who are salaried workers*
keep if age>=16 & age<=65 & empstat==1 & (classwkrd==22 | classwkrd==23 )
gen emppriv=1

**recode the IND1990 to IFR crosspath**
run "$project/xwalks/xwalks_industry/xpath_ind1990_ifr19.do"
tab ind1990 if industry_ifr==""
drop if industry_ifr==""

**Merge czones using geography xwalk**
if `year'==1970{
gen ctygrp1970=cntygp97
collapse (sum) emppriv [fw=perwt], by(industry_ifr ctygrp1970)  fast
joinby ctygrp1970 using "$project/xwalks/xwalks_geography/ctygrp1970_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==1990{
gen puma1990=statefip*10000+puma
collapse (sum) emppriv [fw=perwt], by(industry_ifr puma1990)  fast
joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
assert czone!=. 
}

**Aggregate at the czone level**
collapse (sum) emppriv [iw=afac], by(industry_ifr czone) fast

save "$clean_data_lmarket/czone_`year'_emp_by_ifr19.dta", replace

**Save share datafile for shift-share analysis (only 1970)**
if `year'==1970{
bys czone: egen tot_emp=total(emppriv)
gen share_ind_=emppriv/tot_emp
keep czone industry_ifr share_ind
reshape wide share_ind_, i(czone) j(industry_ifr) string 
save "$clean_data_lmarket/czone_shares_ifr19.dta", replace
}
}
